{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "67c1e6b1",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "下面的例子将展示词向量标准工具包——gensim提供的词嵌入，并展示词嵌入如何表示词的相似度。\n",
    "<!-- https://nlp.stanford.edu/projects/glove/ -->"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "028004b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting numpy\n",
      "  Using cached numpy-2.2.5-cp312-cp312-win_amd64.whl.metadata (60 kB)\n",
      "Using cached numpy-2.2.5-cp312-cp312-win_amd64.whl (12.6 MB)\n",
      "Installing collected packages: numpy\n",
      "Successfully installed numpy-2.2.5\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "19faab2c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found existing installation: gensim 4.3.3\n",
      "Uninstalling gensim-4.3.3:\n",
      "  Successfully uninstalled gensim-4.3.3\n",
      "Found existing installation: numpy 2.2.5\n",
      "Uninstalling numpy-2.2.5:\n",
      "  Successfully uninstalled numpy-2.2.5\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Failed to remove contents in a temporary directory 'C:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\~ensim'.\n",
      "You can safely remove it manually.\n",
      "WARNING: Failed to remove contents in a temporary directory 'C:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\~-mpy.libs'.\n",
      "You can safely remove it manually.\n",
      "WARNING: Failed to remove contents in a temporary directory 'C:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\~-mpy'.\n",
      "You can safely remove it manually.\n"
     ]
    }
   ],
   "source": [
    "pip uninstall gensim numpy -y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a864631e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing to C:\\Users\\孔子\\AppData\\Roaming\\pip\\pip.ini\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "23a49201",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing to C:\\Users\\孔子\\AppData\\Roaming\\pip\\pip.ini\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "\n",
    "pip config set install.trusted-host tuna.tsinghua.edu.cn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f3b6e93c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting numpy==1.23.5\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/42/38/775b43da55fa7473015eddc9a819571517d9a271a9f8134f68fb9be2f212/numpy-1.23.5.tar.gz (10.7 MB)\n",
      "     ---------------------------------------- 0.0/10.7 MB ? eta -:--:--\n",
      "     ------------------------- -------------- 6.8/10.7 MB 35.0 MB/s eta 0:00:01\n",
      "     --------------------------------------- 10.7/10.7 MB 27.9 MB/s eta 0:00:00\n",
      "  Installing build dependencies: started\n",
      "  Installing build dependencies: finished with status 'done'\n",
      "  Getting requirements to build wheel: started\n",
      "  Getting requirements to build wheel: finished with status 'done'\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ERROR: Exception:\n",
      "Traceback (most recent call last):\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\cli\\base_command.py\", line 105, in _run_wrapper\n",
      "    status = _inner_run()\n",
      "             ^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\cli\\base_command.py\", line 96, in _inner_run\n",
      "    return self.run(options, args)\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\cli\\req_command.py\", line 68, in wrapper\n",
      "    return func(self, options, args)\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\commands\\install.py\", line 387, in run\n",
      "    requirement_set = resolver.resolve(\n",
      "                      ^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\resolver.py\", line 96, in resolve\n",
      "    result = self._result = resolver.resolve(\n",
      "                            ^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_vendor\\resolvelib\\resolvers\\resolution.py\", line 515, in resolve\n",
      "    state = resolution.resolve(requirements, max_rounds=max_rounds)\n",
      "            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_vendor\\resolvelib\\resolvers\\resolution.py\", line 388, in resolve\n",
      "    self._add_to_criteria(self.state.criteria, r, parent=None)\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_vendor\\resolvelib\\resolvers\\resolution.py\", line 141, in _add_to_criteria\n",
      "    if not criterion.candidates:\n",
      "           ^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_vendor\\resolvelib\\structs.py\", line 194, in __bool__\n",
      "    return bool(self._sequence)\n",
      "           ^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\found_candidates.py\", line 163, in __bool__\n",
      "    self._bool = any(self)\n",
      "                 ^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\found_candidates.py\", line 147, in <genexpr>\n",
      "    return (c for c in iterator if id(c) not in self._incompatible_ids)\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\found_candidates.py\", line 37, in _iter_built\n",
      "    candidate = func()\n",
      "                ^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\factory.py\", line 187, in _make_candidate_from_link\n",
      "    base: Optional[BaseCandidate] = self._make_base_candidate_from_link(\n",
      "                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\factory.py\", line 233, in _make_base_candidate_from_link\n",
      "    self._link_candidate_cache[link] = LinkCandidate(\n",
      "                                       ^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\candidates.py\", line 306, in __init__\n",
      "    super().__init__(\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\candidates.py\", line 159, in __init__\n",
      "    self.dist = self._prepare()\n",
      "                ^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\candidates.py\", line 236, in _prepare\n",
      "    dist = self._prepare_distribution()\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\resolution\\resolvelib\\candidates.py\", line 317, in _prepare_distribution\n",
      "    return preparer.prepare_linked_requirement(self._ireq, parallel_builds=True)\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\operations\\prepare.py\", line 532, in prepare_linked_requirement\n",
      "    return self._prepare_linked_requirement(req, parallel_builds)\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\operations\\prepare.py\", line 647, in _prepare_linked_requirement\n",
      "    dist = _get_prepared_distribution(\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\operations\\prepare.py\", line 71, in _get_prepared_distribution\n",
      "    abstract_dist.prepare_distribution_metadata(\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\distributions\\sdist.py\", line 56, in prepare_distribution_metadata\n",
      "    self._install_build_reqs(finder)\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\distributions\\sdist.py\", line 126, in _install_build_reqs\n",
      "    build_reqs = self._get_build_requires_wheel()\n",
      "                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\distributions\\sdist.py\", line 103, in _get_build_requires_wheel\n",
      "    return backend.get_requires_for_build_wheel()\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_internal\\utils\\misc.py\", line 702, in get_requires_for_build_wheel\n",
      "    return super().get_requires_for_build_wheel(config_settings=cs)\n",
      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_vendor\\pyproject_hooks\\_impl.py\", line 196, in get_requires_for_build_wheel\n",
      "    return self._call_hook(\n",
      "           ^^^^^^^^^^^^^^^^\n",
      "  File \"c:\\Users\\孔子\\Desktop\\社会网络舆情\\@Hands-on-NLP-main\\@Hands-on-NLP-main\\.venv\\Lib\\site-packages\\pip\\_vendor\\pyproject_hooks\\_impl.py\", line 402, in _call_hook\n",
      "    raise BackendUnavailable(\n",
      "pip._vendor.pyproject_hooks._impl.BackendUnavailable: Cannot import 'setuptools.build_meta'\n"
     ]
    }
   ],
   "source": [
    "pip install numpy==1.23.5 gensim==4.3.2 --no-cache-dir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "89ae3558",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting gensim\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/79/7b/747fcb06280764cf20353361162eff68c6b0a3be34c43ead5ae393d3b18e/gensim-4.3.3-cp312-cp312-win_amd64.whl (24.0 MB)\n",
      "     ---------------------------------------- 0.0/24.0 MB ? eta -:--:--\n",
      "     -------- ------------------------------- 5.2/24.0 MB 26.6 MB/s eta 0:00:01\n",
      "     ------------------ -------------------- 11.5/24.0 MB 30.1 MB/s eta 0:00:01\n",
      "     --------------------------- ----------- 17.0/24.0 MB 28.2 MB/s eta 0:00:01\n",
      "     --------------------------------------  23.9/24.0 MB 30.8 MB/s eta 0:00:01\n",
      "     --------------------------------------- 24.0/24.0 MB 29.2 MB/s eta 0:00:00\n",
      "Collecting numpy<2.0,>=1.18.5 (from gensim)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl (15.5 MB)\n",
      "     ---------------------------------------- 0.0/15.5 MB ? eta -:--:--\n",
      "     --------------------- ------------------ 8.4/15.5 MB 43.2 MB/s eta 0:00:01\n",
      "     --------------------------------------- 15.5/15.5 MB 39.0 MB/s eta 0:00:00\n",
      "Requirement already satisfied: scipy<1.14.0,>=1.7.0 in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from gensim) (1.13.1)\n",
      "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from gensim) (7.1.0)\n",
      "Requirement already satisfied: wrapt in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from smart-open>=1.8.1->gensim) (1.17.2)\n",
      "Installing collected packages: numpy, gensim\n",
      "\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   ---------------------------------------- 0/2 [numpy]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   -------------------- ------------------- 1/2 [gensim]\n",
      "   ---------------------------------------- 2/2 [gensim]\n",
      "\n",
      "Successfully installed gensim-4.3.3 numpy-1.26.4\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install gensim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5c5a740a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pprint\n",
    "\n",
    "from gensim.models import KeyedVectors\n",
    "\n",
    "# 从GloVe官网下载GloVe向量，此处使用的是glove.6B.zip\n",
    "# 解压缩zip文件并将以下路径改为解压后对应文件的路径\n",
    "model = KeyedVectors.load_word2vec_format('C:/Users/孔子/Desktop/社会网络舆情/@Hands-on-NLP-main/@Hands-on-NLP-main/glove.6B.100d.txt', binary=False, no_header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "01a2e4a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('movie', 0.9055121541023254),\n",
      " ('films', 0.8914433717727661),\n",
      " ('directed', 0.8124362826347351),\n",
      " ('documentary', 0.8075793981552124),\n",
      " ('drama', 0.7929168939590454),\n",
      " ('movies', 0.7889865040779114),\n",
      " ('comedy', 0.7842751145362854),\n",
      " ('starring', 0.7573285102844238),\n",
      " ('cinema', 0.7419455647468567),\n",
      " ('hollywood', 0.7307389974594116)]\n",
      "[('vehicle', 0.8630837798118591),\n",
      " ('truck', 0.8597878813743591),\n",
      " ('cars', 0.837166965007782),\n",
      " ('driver', 0.8185911178588867),\n",
      " ('driving', 0.781263530254364),\n",
      " ('motorcycle', 0.7553156614303589),\n",
      " ('vehicles', 0.7462257146835327),\n",
      " ('parked', 0.74594646692276),\n",
      " ('bus', 0.737270712852478),\n",
      " ('taxi', 0.7155269384384155)]\n"
     ]
    }
   ],
   "source": [
    "# 使用most_similar()找到词表中距离给定词最近（最相似）的n个词\n",
    "pprint.pprint(model.most_similar('film'))\n",
    "pprint.pprint(model.most_similar('car'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8b62f7ad",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "japanese\n",
      "panda\n",
      "longest\n",
      "terrible\n",
      "queen\n"
     ]
    }
   ],
   "source": [
    "# 利用GloVe展示一个类比的例子\n",
    "def analogy(x1, x2, y1):\n",
    "    # 寻找top-N最相似的词。\n",
    "    result = model.most_similar(positive=[y1, x2], negative=[x1])\n",
    "    return result[0][0]\n",
    "\n",
    "print(analogy('china', 'chinese', 'japan'))\n",
    "print(analogy('australia', 'koala', 'china'))\n",
    "print(analogy('tall', 'tallest', 'long'))\n",
    "print(analogy('good', 'fantastic', 'bad'))\n",
    "print(analogy('man', 'woman', 'king'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c308cee",
   "metadata": {},
   "source": [
    "下面将展示word2vec的代码，包括文本预处理、skipgram算法的实现、以及使用PyTorch进行优化。这里使用《小王子》这本书作为训练语料。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "590fc408",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 安装NLTK，使用如下代码下载punkt组件\n",
    "#import nltk\n",
    "#nltk.download('punkt')\n",
    "\n",
    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
    "from collections import defaultdict\n",
    "\n",
    "# 使用类管理数据对象，包括文本读取、文本预处理等\n",
    "class TheLittlePrinceDataset:\n",
    "    def __init__(self, tokenize=True):\n",
    "        # 利用NLTK函数进行分句和分词\n",
    "        text = open('the little prince.txt', 'r', encoding='utf-8').read()\n",
    "        if tokenize:\n",
    "            self.sentences = sent_tokenize(text.lower())\n",
    "            self.tokens = [word_tokenize(sent) for sent in self.sentences]\n",
    "        else:\n",
    "            self.text = text\n",
    "\n",
    "    def build_vocab(self, min_freq=1):\n",
    "        # 统计词频\n",
    "        frequency = defaultdict(int)\n",
    "        for sentence in self.tokens:\n",
    "            for token in sentence:\n",
    "                frequency[token] += 1\n",
    "        self.frequency = frequency\n",
    "\n",
    "        # 加入<unk>处理未登录词，加入<pad>用于对齐变长输入进而加速\n",
    "        self.token2id = {'<unk>': 1, '<pad>': 0}\n",
    "        self.id2token = {1: '<unk>', 0: '<pad>'}\n",
    "        for token, freq in sorted(frequency.items(), key=lambda x: -x[1]):\n",
    "            # 丢弃低频词\n",
    "            if freq > min_freq:\n",
    "                self.token2id[token] = len(self.token2id)\n",
    "                self.id2token[len(self.id2token)] = token\n",
    "            else:\n",
    "                break\n",
    "\n",
    "    def get_word_distribution(self):\n",
    "        distribution = np.zeros(vocab_size)\n",
    "        for token, freq in self.frequency.items():\n",
    "            if token in dataset.token2id:\n",
    "                distribution[dataset.token2id[token]] = freq\n",
    "            else:\n",
    "                # 不在词表中的词按<unk>计算\n",
    "                distribution[1] += freq\n",
    "        distribution /= distribution.sum()\n",
    "        return distribution\n",
    "\n",
    "    # 将分词结果转化为索引表示\n",
    "    def convert_tokens_to_ids(self, drop_single_word=True):\n",
    "        self.token_ids = []\n",
    "        for sentence in self.tokens:\n",
    "            token_ids = [self.token2id.get(token, 1) for token in sentence]\n",
    "            # 忽略只有一个token的序列，无法计算loss\n",
    "            if len(token_ids) == 1 and drop_single_word:\n",
    "                continue\n",
    "            self.token_ids.append(token_ids)\n",
    "        \n",
    "        return self.token_ids\n",
    "\n",
    "dataset = TheLittlePrinceDataset()\n",
    "dataset.build_vocab(min_freq=1)\n",
    "sentences = dataset.convert_tokens_to_ids()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "efc882de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(76044, 2) [[  4  16]\n",
      " [  4  19]\n",
      " [ 16   4]\n",
      " ...\n",
      " [130   3]\n",
      " [  3  86]\n",
      " [  3 130]]\n"
     ]
    }
   ],
   "source": [
    "# 遍历所有的中心词-上下文词对\n",
    "window_size = 2\n",
    "data = []\n",
    "\n",
    "for sentence in sentences:\n",
    "    for i in range(len(sentence)):\n",
    "        for j in range(i-window_size, i+window_size+1):\n",
    "            if j == i or j < 0 or j >= len(sentence):\n",
    "                continue\n",
    "            center_word = sentence[i]\n",
    "            context_word = sentence[j]\n",
    "            data.append([center_word, context_word])\n",
    "\n",
    "# 需要提前安装numpy\n",
    "import numpy as np\n",
    "data = np.array(data)\n",
    "print(data.shape, data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f96b02ee",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting torch\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/44/80/b353c024e6b624cd9ce1d66dcb9d24e0294680f95b369f19280e241a0159/torch-2.7.0-cp312-cp312-win_amd64.whl (212.5 MB)\n",
      "     ---------------------------------------- 0.0/212.5 MB ? eta -:--:--\n",
      "     ---------------------------------------- 0.5/212.5 MB 3.4 MB/s eta 0:01:04\n",
      "     ---------------------------------------- 1.6/212.5 MB 4.2 MB/s eta 0:00:51\n",
      "      --------------------------------------- 4.2/212.5 MB 7.9 MB/s eta 0:00:27\n",
      "     - -------------------------------------- 6.3/212.5 MB 8.6 MB/s eta 0:00:25\n",
      "     - -------------------------------------- 8.4/212.5 MB 9.0 MB/s eta 0:00:23\n",
      "     - ------------------------------------- 10.2/212.5 MB 9.0 MB/s eta 0:00:23\n",
      "     -- ------------------------------------ 12.3/212.5 MB 9.1 MB/s eta 0:00:23\n",
      "     -- ------------------------------------ 14.2/212.5 MB 9.3 MB/s eta 0:00:22\n",
      "     --- ----------------------------------- 16.8/212.5 MB 9.6 MB/s eta 0:00:21\n",
      "     --- ---------------------------------- 19.4/212.5 MB 10.0 MB/s eta 0:00:20\n",
      "     --- ---------------------------------- 21.8/212.5 MB 10.1 MB/s eta 0:00:19\n",
      "     ---- --------------------------------- 24.4/212.5 MB 10.4 MB/s eta 0:00:19\n",
      "     ---- --------------------------------- 26.7/212.5 MB 10.5 MB/s eta 0:00:18\n",
      "     ----- -------------------------------- 29.4/212.5 MB 10.6 MB/s eta 0:00:18\n",
      "     ----- -------------------------------- 32.0/212.5 MB 10.9 MB/s eta 0:00:17\n",
      "     ------ ------------------------------- 34.9/212.5 MB 11.0 MB/s eta 0:00:17\n",
      "     ------ ------------------------------- 37.7/212.5 MB 11.3 MB/s eta 0:00:16\n",
      "     ------- ------------------------------ 40.6/212.5 MB 11.4 MB/s eta 0:00:16\n",
      "     ------- ------------------------------ 43.3/212.5 MB 11.6 MB/s eta 0:00:15\n",
      "     ------- ------------------------------ 43.3/212.5 MB 11.6 MB/s eta 0:00:15\n",
      "     ------- ------------------------------ 43.3/212.5 MB 11.6 MB/s eta 0:00:15\n",
      "     ------- ------------------------------ 44.0/212.5 MB 10.1 MB/s eta 0:00:17\n",
      "     -------- ----------------------------- 47.2/212.5 MB 10.3 MB/s eta 0:00:17\n",
      "     -------- ----------------------------- 50.1/212.5 MB 10.5 MB/s eta 0:00:16\n",
      "     --------- ---------------------------- 52.7/212.5 MB 10.6 MB/s eta 0:00:16\n",
      "     --------- ---------------------------- 55.6/212.5 MB 10.8 MB/s eta 0:00:15\n",
      "     ---------- --------------------------- 58.2/212.5 MB 10.8 MB/s eta 0:00:15\n",
      "     ---------- --------------------------- 60.8/212.5 MB 10.9 MB/s eta 0:00:14\n",
      "     ----------- -------------------------- 64.2/212.5 MB 11.1 MB/s eta 0:00:14\n",
      "     ------------ ------------------------- 67.1/212.5 MB 11.3 MB/s eta 0:00:13\n",
      "     ------------ ------------------------- 70.0/212.5 MB 11.4 MB/s eta 0:00:13\n",
      "     ------------- ------------------------ 73.1/212.5 MB 11.5 MB/s eta 0:00:13\n",
      "     ------------- ------------------------ 76.0/212.5 MB 11.6 MB/s eta 0:00:12\n",
      "     -------------- ----------------------- 79.2/212.5 MB 11.7 MB/s eta 0:00:12\n",
      "     -------------- ----------------------- 81.5/212.5 MB 11.7 MB/s eta 0:00:12\n",
      "     -------------- ----------------------- 83.6/212.5 MB 11.6 MB/s eta 0:00:12\n",
      "     --------------- ---------------------- 86.0/212.5 MB 11.6 MB/s eta 0:00:11\n",
      "     --------------- ---------------------- 87.6/212.5 MB 11.6 MB/s eta 0:00:11\n",
      "     ---------------- --------------------- 89.7/212.5 MB 11.5 MB/s eta 0:00:11\n",
      "     ---------------- --------------------- 91.5/212.5 MB 11.4 MB/s eta 0:00:11\n",
      "     ---------------- --------------------- 93.3/212.5 MB 11.4 MB/s eta 0:00:11\n",
      "     ----------------- -------------------- 95.2/212.5 MB 11.4 MB/s eta 0:00:11\n",
      "     ----------------- -------------------- 97.5/212.5 MB 11.4 MB/s eta 0:00:11\n",
      "     ----------------- -------------------- 99.9/212.5 MB 11.4 MB/s eta 0:00:10\n",
      "     ----------------- ------------------- 102.2/212.5 MB 11.4 MB/s eta 0:00:10\n",
      "     ------------------ ------------------ 104.9/212.5 MB 11.4 MB/s eta 0:00:10\n",
      "     ------------------ ------------------ 106.2/212.5 MB 11.4 MB/s eta 0:00:10\n",
      "     ------------------ ------------------ 109.1/212.5 MB 11.4 MB/s eta 0:00:10\n",
      "     ------------------- ----------------- 111.7/212.5 MB 11.4 MB/s eta 0:00:09\n",
      "     ------------------- ----------------- 114.6/212.5 MB 11.5 MB/s eta 0:00:09\n",
      "     -------------------- ---------------- 117.4/212.5 MB 11.5 MB/s eta 0:00:09\n",
      "     -------------------- ---------------- 119.0/212.5 MB 11.5 MB/s eta 0:00:09\n",
      "     -------------------- ---------------- 119.0/212.5 MB 11.5 MB/s eta 0:00:09\n",
      "     --------------------- --------------- 121.6/212.5 MB 11.3 MB/s eta 0:00:09\n",
      "     --------------------- --------------- 123.7/212.5 MB 11.2 MB/s eta 0:00:08\n",
      "     --------------------- --------------- 125.6/212.5 MB 11.2 MB/s eta 0:00:08\n",
      "     ---------------------- -------------- 127.7/212.5 MB 11.2 MB/s eta 0:00:08\n",
      "     ---------------------- -------------- 129.5/212.5 MB 11.1 MB/s eta 0:00:08\n",
      "     ---------------------- -------------- 131.3/212.5 MB 11.1 MB/s eta 0:00:08\n",
      "     ----------------------- ------------- 133.4/212.5 MB 11.1 MB/s eta 0:00:08\n",
      "     ----------------------- ------------- 134.2/212.5 MB 11.1 MB/s eta 0:00:08\n",
      "     ----------------------- ------------- 137.4/212.5 MB 11.1 MB/s eta 0:00:07\n",
      "     ------------------------ ------------ 139.5/212.5 MB 11.1 MB/s eta 0:00:07\n",
      "     ------------------------ ------------ 141.8/212.5 MB 11.1 MB/s eta 0:00:07\n",
      "     ------------------------- ----------- 143.7/212.5 MB 11.1 MB/s eta 0:00:07\n",
      "     ------------------------- ----------- 146.5/212.5 MB 11.1 MB/s eta 0:00:06\n",
      "     ------------------------- ----------- 148.9/212.5 MB 11.1 MB/s eta 0:00:06\n",
      "     -------------------------- ---------- 151.8/212.5 MB 11.1 MB/s eta 0:00:06\n",
      "     -------------------------- ---------- 154.7/212.5 MB 11.2 MB/s eta 0:00:06\n",
      "     --------------------------- --------- 157.8/212.5 MB 11.2 MB/s eta 0:00:05\n",
      "     --------------------------- --------- 160.7/212.5 MB 11.3 MB/s eta 0:00:05\n",
      "     ---------------------------- -------- 163.6/212.5 MB 11.3 MB/s eta 0:00:05\n",
      "     ---------------------------- -------- 166.5/212.5 MB 11.4 MB/s eta 0:00:05\n",
      "     ----------------------------- ------- 169.3/212.5 MB 11.4 MB/s eta 0:00:04\n",
      "     ----------------------------- ------- 172.2/212.5 MB 11.5 MB/s eta 0:00:04\n",
      "     ------------------------------ ------ 174.3/212.5 MB 11.4 MB/s eta 0:00:04\n",
      "     ------------------------------ ------ 176.4/212.5 MB 11.4 MB/s eta 0:00:04\n",
      "     ------------------------------- ----- 178.3/212.5 MB 11.4 MB/s eta 0:00:04\n",
      "     ------------------------------- ----- 180.1/212.5 MB 11.4 MB/s eta 0:00:03\n",
      "     ------------------------------- ----- 181.9/212.5 MB 11.3 MB/s eta 0:00:03\n",
      "     ------------------------------- ----- 183.8/212.5 MB 11.3 MB/s eta 0:00:03\n",
      "     -------------------------------- ---- 186.1/212.5 MB 11.3 MB/s eta 0:00:03\n",
      "     -------------------------------- ---- 188.2/212.5 MB 11.3 MB/s eta 0:00:03\n",
      "     --------------------------------- --- 190.1/212.5 MB 11.3 MB/s eta 0:00:02\n",
      "     --------------------------------- --- 192.7/212.5 MB 11.3 MB/s eta 0:00:02\n",
      "     ---------------------------------- -- 195.3/212.5 MB 11.3 MB/s eta 0:00:02\n",
      "     ---------------------------------- -- 198.2/212.5 MB 11.3 MB/s eta 0:00:02\n",
      "     ----------------------------------- - 201.3/212.5 MB 11.4 MB/s eta 0:00:01\n",
      "     ----------------------------------- - 204.5/212.5 MB 11.4 MB/s eta 0:00:01\n",
      "     ------------------------------------  207.1/212.5 MB 11.5 MB/s eta 0:00:01\n",
      "     ------------------------------------  208.7/212.5 MB 11.5 MB/s eta 0:00:01\n",
      "     ------------------------------------  212.3/212.5 MB 11.5 MB/s eta 0:00:01\n",
      "     ------------------------------------  212.3/212.5 MB 11.5 MB/s eta 0:00:01\n",
      "     ------------------------------------- 212.5/212.5 MB 11.4 MB/s eta 0:00:00\n",
      "Collecting filelock (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl (16 kB)\n",
      "Collecting typing-extensions>=4.10.0 (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl (45 kB)\n",
      "Collecting sympy>=1.13.3 (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl (6.3 MB)\n",
      "     ---------------------------------------- 0.0/6.3 MB ? eta -:--:--\n",
      "     ------------------- -------------------- 3.1/6.3 MB 18.5 MB/s eta 0:00:01\n",
      "     ---------------------------------------  6.3/6.3 MB 16.1 MB/s eta 0:00:01\n",
      "     ---------------------------------------- 6.3/6.3 MB 15.5 MB/s eta 0:00:00\n",
      "Collecting networkx (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl (1.7 MB)\n",
      "     ---------------------------------------- 0.0/1.7 MB ? eta -:--:--\n",
      "     ---------------------------------------- 1.7/1.7 MB 11.7 MB/s eta 0:00:00\n",
      "Collecting jinja2 (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl (134 kB)\n",
      "Collecting fsspec (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/44/4b/e0cfc1a6f17e990f3e64b7d941ddc4acdc7b19d6edd51abf495f32b1a9e4/fsspec-2025.3.2-py3-none-any.whl (194 kB)\n",
      "Collecting setuptools (from torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/53/7e/5d8af3317ddbf9519b687bd1c39d8737fde07d97f54df65553faca5cffb1/setuptools-80.3.1-py3-none-any.whl (1.2 MB)\n",
      "     ---------------------------------------- 0.0/1.2 MB ? eta -:--:--\n",
      "     ---------------------------------------- 1.2/1.2 MB 15.1 MB/s eta 0:00:00\n",
      "Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
      "     ---------------------------------------- 0.0/536.2 kB ? eta -:--:--\n",
      "     -------------------------------------- 536.2/536.2 kB 8.8 MB/s eta 0:00:00\n",
      "Collecting MarkupSafe>=2.0 (from jinja2->torch)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl (15 kB)\n",
      "Installing collected packages: mpmath, typing-extensions, sympy, setuptools, networkx, MarkupSafe, fsspec, filelock, jinja2, torch\n",
      "\n",
      "   ----------------------------------------  0/10 [mpmath]\n",
      "   ----------------------------------------  0/10 [mpmath]\n",
      "   ----------------------------------------  0/10 [mpmath]\n",
      "   ----------------------------------------  0/10 [mpmath]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   -------- -------------------------------  2/10 [sympy]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ------------ ---------------------------  3/10 [setuptools]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ---------------- -----------------------  4/10 [networkx]\n",
      "   ------------------------ ---------------  6/10 [fsspec]\n",
      "   ------------------------ ---------------  6/10 [fsspec]\n",
      "   ---------------------------- -----------  7/10 [filelock]\n",
      "   -------------------------------- -------  8/10 [jinja2]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ------------------------------------ ---  9/10 [torch]\n",
      "   ---------------------------------------- 10/10 [torch]\n",
      "\n",
      "Successfully installed MarkupSafe-3.0.2 filelock-3.18.0 fsspec-2025.3.2 jinja2-3.1.6 mpmath-1.3.0 networkx-3.4.2 setuptools-80.3.1 sympy-1.14.0 torch-2.7.0 typing-extensions-4.13.2\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "30903b3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 需要提前安装PyTorch\n",
    "import torch\n",
    "from torch import nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "# 实现skipgram算法，使用对比学习计算损失\n",
    "class SkipGramNCE(nn.Module):\n",
    "    def __init__(self, vocab_size, embed_size, distribution,\\\n",
    "                 neg_samples=20):\n",
    "        super(SkipGramNCE, self).__init__()\n",
    "        print(f'vocab_size = {vocab_size}, embed_size = {embed_size}, '+\\\n",
    "              f'neg_samples = {neg_samples}')\n",
    "        self.input_embeddings = nn.Embedding(vocab_size, embed_size)\n",
    "        self.output_embeddings = nn.Embedding(vocab_size, embed_size)\n",
    "        distribution = np.power(distribution, 0.75)\n",
    "        distribution /= distribution.sum()\n",
    "        self.distribution = torch.tensor(distribution)\n",
    "        self.neg_samples = neg_samples\n",
    "        \n",
    "    def forward(self, input_ids, labels):\n",
    "        i_embed = self.input_embeddings(input_ids)\n",
    "        o_embed = self.output_embeddings(labels)\n",
    "        batch_size = i_embed.size(0)\n",
    "        n_words = torch.multinomial(self.distribution, batch_size * \\\n",
    "            self.neg_samples, replacement=True).view(batch_size, -1)\n",
    "        n_embed = self.output_embeddings(n_words)\n",
    "        pos_term = F.logsigmoid(torch.sum(i_embed * o_embed, dim=1))\n",
    "        # 负采样，用于对比学习\n",
    "        neg_term = F.logsigmoid(- torch.bmm(n_embed, \\\n",
    "            i_embed.unsqueeze(2)).squeeze())\n",
    "        neg_term = torch.sum(neg_term, dim=1)\n",
    "        loss = - torch.mean(pos_term + neg_term)\n",
    "        return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "1f3e1318",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Collecting matplotlib\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a6/b6/a9405484fb40746fdc6ae4502b16a9d6e53282ba5baaf9ebe2da579f68c4/matplotlib-3.10.1-cp312-cp312-win_amd64.whl (8.1 MB)\n",
      "     ---------------------------------------- 0.0/8.1 MB ? eta -:--:--\n",
      "     ----- ---------------------------------- 1.0/8.1 MB 5.6 MB/s eta 0:00:02\n",
      "     ------------------- -------------------- 3.9/8.1 MB 10.2 MB/s eta 0:00:01\n",
      "     ------------------------ --------------- 5.0/8.1 MB 10.1 MB/s eta 0:00:01\n",
      "     ---------------------------------------  7.9/8.1 MB 10.1 MB/s eta 0:00:01\n",
      "     ---------------------------------------- 8.1/8.1 MB 9.8 MB/s eta 0:00:00\n",
      "Collecting contourpy>=1.0.1 (from matplotlib)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/01/c8/fadd0b92ffa7b5eb5949bf340a63a4a496a6930a6c37a7ba0f12acb076d6/contourpy-1.3.2-cp312-cp312-win_amd64.whl (223 kB)\n",
      "Collecting cycler>=0.10 (from matplotlib)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
      "Collecting fonttools>=4.22.0 (from matplotlib)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/06/5d/1be5424bb305880e1113631f49a55ea7c7da3a5fe02608ca7c16a03a21da/fonttools-4.57.0-cp312-cp312-win_amd64.whl (2.2 MB)\n",
      "     ---------------------------------------- 0.0/2.2 MB ? eta -:--:--\n",
      "     ---------------------------------------- 2.2/2.2 MB 17.6 MB/s eta 0:00:00\n",
      "Collecting kiwisolver>=1.3.1 (from matplotlib)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/bd/72/dfff0cc97f2a0776e1c9eb5bef1ddfd45f46246c6533b0191887a427bca5/kiwisolver-1.4.8-cp312-cp312-win_amd64.whl (71 kB)\n",
      "Requirement already satisfied: numpy>=1.23 in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from matplotlib) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from matplotlib) (25.0)\n",
      "Collecting pillow>=8 (from matplotlib)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7c/18/24bff2ad716257fc03da964c5e8f05d9790a779a8895d6566e493ccf0189/pillow-11.2.1-cp312-cp312-win_amd64.whl (2.7 MB)\n",
      "     ---------------------------------------- 0.0/2.7 MB ? eta -:--:--\n",
      "     ---------------------------------------- 2.7/2.7 MB 31.0 MB/s eta 0:00:00\n",
      "Collecting pyparsing>=2.3.1 (from matplotlib)\n",
      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl (111 kB)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from matplotlib) (2.9.0.post0)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\孔子\\desktop\\社会网络舆情\\@hands-on-nlp-main\\@hands-on-nlp-main\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n",
      "Installing collected packages: pyparsing, pillow, kiwisolver, fonttools, cycler, contourpy, matplotlib\n",
      "\n",
      "   ----- ---------------------------------- 1/7 [pillow]\n",
      "   ----- ---------------------------------- 1/7 [pillow]\n",
      "   ----- ---------------------------------- 1/7 [pillow]\n",
      "   ----- ---------------------------------- 1/7 [pillow]\n",
      "   ----- ---------------------------------- 1/7 [pillow]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ----------------- ---------------------- 3/7 [fonttools]\n",
      "   ---------------------------- ----------- 5/7 [contourpy]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------- ----- 6/7 [matplotlib]\n",
      "   ---------------------------------------- 7/7 [matplotlib]\n",
      "\n",
      "Successfully installed contourpy-1.3.2 cycler-0.12.1 fonttools-4.57.0 kiwisolver-1.4.8 matplotlib-3.10.1 pillow-11.2.1 pyparsing-3.2.3\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install matplotlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d9da6c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00000000e+00 5.43983724e-02 5.34295679e-02 ... 9.68804495e-05\n",
      " 9.68804495e-05 9.68804495e-05]\n",
      "vocab_size = 1078, embed_size = 128, neg_samples = 20\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch-99, loss=1.9931: 100%|█| 100/100 [04:25<00:00,  2.66s/\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAGwCAYAAACzXI8XAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAQcNJREFUeJzt3Qd8VFX6//EnPYGQhCQSWuggTYr0YllEAV0WhV0F0UVE2XURBdy17K66/leFn65iQyyrYEdZFQUVCwKK0psUpUgVSAKBVEif/+s5yYwZSEJI7sydST7v12uYmsnNZWbud855zjkBDofDIQAAAH4o0O4NAAAAqCqCDAAA8FsEGQAA4LcIMgAAwG8RZAAAgN8iyAAAAL9FkAEAAH4rWGq4oqIiOXz4sNSrV08CAgLs3hwAAFAJOs1dZmamNG7cWAIDA2tvkNEQk5iYaPdmAACAKjh48KA0bdq09gYZbYlx7oioqCi7NwcAAFRCRkaGaYhwHsdrbZBxdidpiCHIAADgX85WFkKxLwAA8FsEGQAA4LcIMgAAwG8RZAAAgN8iyAAAAL9FkAEAAH6LIAMAAPwWQQYAAPgtggwAAPBbBBkAAOC3CDIAAMBvEWQAAIDfqvGLRnpKRk6+pJ/Ml3rhwRJTJ9TuzQEAoFaiRaaKHl60XS56bKm8tfqA3ZsCAECtRZCpojqhxY1Zp/IK7d4UAABqLYJMFYWHBJnzU/kEGQAA7EKQqaIIggwAALYjyFRRRGjxrsuhawkAANsQZKqIFhkAAOxHkKmiiJJi35O0yAAAYBuCTBXRIgMAgP0IMtWtkSHIAABgG4JMdYdf07UEAIBtCDJVRNcSAAD2I8hUETP7AgBgP4JMFdEiAwCA/QgyVRReUuyrQcbhcNi9OQAA1EoEmWq2yGiGyS0osntzAAColQgy1QwyijoZAADsQZCpouCgQAkN+rV7CQAAeB9BphrCQwgyAADYiSBTDRGhTIoHAICdCDIW1MmwTAEAAPYgyFQDK2ADAGAvgkw1RFAjAwCArQgyFtTI0LUEAIA9CDJWLFNA1xIAALYgyFRDOOstAQBgK4JMNdQp6Vqi2BcAAHsQZKqB4dcAANiLIFMN4UyIBwCArQgyVhT70iIDAIAtCDIW1MgQZAAAsAdBphoYfg0AgL0IMtXA8GsAAOxFkKkGVr8GAMBeBJlqYPg1AAD2IshY0CLDhHgAANiDIFMNDL8GAMBeBJlqYPVrAADsRZCpBoZfAwBQi4PMv/71LwkICHA7tW/f3nV/Tk6OTJo0SeLi4iQyMlJGjRolycnJ4nOjlvILxeFw2L05AADUOra3yHTq1EmOHDniOq1YscJ139SpU2XhwoUyf/58Wb58uRw+fFhGjhwpvtYiU+QQyS0osntzAACodYJt34DgYGnYsOEZt6enp8srr7wib7/9tgwaNMjcNmfOHOnQoYOsWrVK+vbtK74yIZ6zTqb0dQAAUAtaZHbt2iWNGzeWVq1aydixY+XAgQPm9vXr10t+fr4MHjzY9VjtdmrWrJmsXLmy3OfLzc2VjIwMt5OnhAQFSkhQgLnMyCUAAGpZkOnTp4/MnTtXFi9eLLNnz5a9e/fKRRddJJmZmZKUlCShoaESExPj9jMJCQnmvvJMnz5doqOjXafExETvLFNAwS8AALWra2nYsGGuy126dDHBpnnz5vLee+9JRERElZ7zvvvuk2nTprmua4uMJ8OMroCdmVNAiwwAALWxa6k0bX1p166d7N6929TN5OXlSVpamttjdNRSWTU1TmFhYRIVFeV28iSGYAMAYB+fCjJZWVny888/S6NGjaRHjx4SEhIiS5Yscd2/Y8cOU0PTr18/8RWsgA0AQC3tWvrrX/8qw4cPN91JOrT6wQcflKCgIBkzZoypb5kwYYLpJoqNjTUtK5MnTzYhxhdGLDmxAjYAALU0yPzyyy8mtKSmpsp5550nAwcONEOr9bKaOXOmBAYGmonwdDTSkCFD5PnnnxdfwnpLAADU0iAzb968Cu8PDw+XWbNmmZOv0mJfRYsMAAC1vEbGH1EjAwCAfQgy1UTXEgAA9iHIWFTsm0PXEgAAXkeQsXAFbAAA4F0EGYu6lk7SIgMAgNcRZKqJGhkAAOxDkLGqRoYgAwCA1xFkqonVrwEAsA9BxqoJ8WiRAQDA6wgy1cTq1wAA2IcgU00U+wIAYB+CTDWF07UEAIBtCDKWdS0V2b0pAADUOgQZi4p9GX4NAID3EWQsm9m3QBwOh92bAwBArUKQsahGpsghkldI9xIAAN5EkLGoRUblUCcDAIBXEWSqKSQoUEKCAsxlRi4BAOBdBBkLlynQOhkAAOA9BBkLMCkeAAD2IMhYgBWwAQCwB0HGAkyKBwCAPQgyFrbI0LUEAIB3EWQsnhQPAAB4D0HGwiBDjQwAAN5FkLFyBew8ggwAAN5EkLFAHdfwa4p9AQDwJoKMBSj2BQDAHgQZS4dfU+wLAIA3EWQsXKKAFhkAALyLIGNl1xIT4gEA4FUEGQvUYYkCAABsQZCxAKtfAwBgD4KMBVj9GgAAexBkLA0y1MgAAOBNBBkLi31zmNkXAACvIshYgAnxAACwB0HG0tWvCTIAAHgTQcYCrH4NAIA9CDIWdy05HA67NwcAgFqDIGNhkCksckh+IUEGAABvIchY2LWkKPgFAMB7CDIWCAkKlODAAHP5FAW/AAB4DUHGIszuCwCA9xFkLBLuWgGbIAMAgLcQZCxeAZsWGQAAvIcgYxHmkgEAwPsIMhYJZ3ZfAAC8jiBjEYp9AQDwPoKMRVgBGwAA7yPIWIQVsAEAqMVBZsaMGRIQECBTpkxx3ZaTkyOTJk2SuLg4iYyMlFGjRklycrL4IlbABgCglgaZtWvXyosvvihdunRxu33q1KmycOFCmT9/vixfvlwOHz4sI0eOFF9EjQwAALUwyGRlZcnYsWPl5Zdflvr167tuT09Pl1deeUWefPJJGTRokPTo0UPmzJkj33//vaxatUp8tkaGIAMAQO0JMtp1dNVVV8ngwYPdbl+/fr3k5+e73d6+fXtp1qyZrFy5stzny83NlYyMDLeTV1tk6FoCAMBrgsVG8+bNkw0bNpiupdMlJSVJaGioxMTEuN2ekJBg7ivP9OnT5aGHHhJvo9gXAIBa1CJz8OBBufPOO+Wtt96S8PBwy573vvvuM91SzpP+Hm+gRQYAgFoUZLTrKCUlRS688EIJDg42Jy3ofeaZZ8xlbXnJy8uTtLQ0t5/TUUsNGzYs93nDwsIkKirK7eQNFPsCAFCLupYuu+wy2bJli9tt48ePN3Uw99xzjyQmJkpISIgsWbLEDLtWO3bskAMHDki/fv3E17D6NQAAtSjI1KtXTzp37ux2W926dc2cMc7bJ0yYINOmTZPY2FjTsjJ58mQTYvr27Su+pg4tMgAA1K5i37OZOXOmBAYGmhYZHY00ZMgQef7558UXMfwaAIBaHmSWLVvmdl2LgGfNmmVOvo7VrwEAqIXzyNQUFPsCAOB9BBmLsPo1AADeR5CxSB0mxAMAwOsIMhbXyBQUOSSvoMjuzQEAoFYgyFikbmiQBAQUX87Iybd7cwAAqBUIMhYJDgqU+nVCzeXUrDy7NwcAgFqBIGOhuLrOIJNr96YAAFArEGQsFBdZHGSOZdMiAwCANxBkLBQXGWbOaZEBAMA7CDIWind1LdEiAwCANxBkPNEik02LDAAA3kCQ8USNDC0yAAB4BUHGQnF1i1tkjlEjAwCAVxBkLHRePWpkAADwJoKMB1pkGLUEAIB3EGQ8UCOTnVcop1gFGwAAjyPIWCgyLFhCg4t3KSOXAADwPIKMhQICAphLBgAALyLIWIy5ZAAA8B6CjMWYSwYAAO8hyFiMuWQAAPAegozF4ktaZKiRAQDA8wgyFotnBWwAALyGIOOhGpnUbFpkAADwNIKMh0YtUewLAIDnEWQsFueaR4auJQAAPI0g46EamePZeVJU5LB7cwAAqNEIMhaLLWmRKShySEZOvt2bAwBAjUaQsZiutRQVHmwuM5cMAACeRZDxYPcSBb8AAHgWQcaTQ7AJMgAAeBRBxpOT4rFwJAAAHkWQ8QAWjgQAwDsIMh5cOJK5ZAAA8CyCjAewcCQAAN5BkPHgMgXUyAAA4FkEGQ8uU0CNDAAAnkWQ8ejCkbTIAADgSQQZD9bIZOYUSG5Bod2bAwBAjUWQ8YCo8BAJDgxwLR4JAAA8gyDjAYGBAczuCwCAFxBkPDyXDHUyAAB4DkHGQ2iRAQDA8wgyHsJ6SwAAeB5BxkOYSwYAAM8jyHgIc8kAAOB5BBkPoUYGAADPI8h4euFIamQAAPAYgoyni31pkQEAwLeCzGuvvSaffPKJ6/rdd98tMTEx0r9/f9m/f7+V2+f/K2Bn5YnD4bB7cwAAqJGqFGQeffRRiYiIMJdXrlwps2bNkscee0zi4+Nl6tSplX6e2bNnS5cuXSQqKsqc+vXrJ5999pnr/pycHJk0aZLExcVJZGSkjBo1SpKTk8WfRi3lFRZJZm6B3ZsDAECNVKUgc/DgQWnTpo25vGDBAhMwJk6cKNOnT5dvv/220s/TtGlTmTFjhqxfv17WrVsngwYNkhEjRsi2bdvM/RqKFi5cKPPnz5fly5fL4cOHZeTIkeIPwkOCJDIs2FymewkAAB8KMto6kpqaai5/8cUXcvnll5vL4eHhcurUqUo/z/Dhw+XKK6+Utm3bSrt27eSRRx4xz71q1SpJT0+XV155RZ588kkTcHr06CFz5syR77//3tzvTyOXGIINAIBnFDcZnCMNLrfccot0795ddu7cacKI0paUFi1aVGlDCgsLTctLdna26WLSVpr8/HwZPHiw6zHt27eXZs2ame6svn37lvk8ubm55uSUkZEhdnYv7U89KakEGQAAfKdFRmtiNGwcPXpU3n//fVPDojR8jBkz5pyea8uWLaYVJiwsTP785z/Lhx9+KB07dpSkpCQJDQ01RcSlJSQkmPvKo91b0dHRrlNiYqLYPykeXUsAAPhMi4yGi+eee+6M2x966KFzfq7zzz9fNm3aZLqS/ve//8m4ceNMPUxV3XfffTJt2jS3Fhm7woxrLhmCDAAAvtMis3jxYlmxYoVbC023bt3k+uuvlxMnTpzTc2mrixYOaw2MtqZ07dpVnn76aWnYsKHk5eVJWlqa2+N11JLeVx5t2XGOgnKe7BJXl2UKAADwuSDzt7/9zVV7ol1Dd911l6mT2bt3r1trSFUUFRWZGhcNNiEhIbJkyRLXfTt27JADBw6Ybi1/UL9kCHbaqXy7NwUAgBqpSl1LGli0jkVpjcxvf/tbM7fMhg0bXIW/le0GGjZsmCngzczMlLfffluWLVsmn3/+ualvmTBhgglGsbGxpmVl8uTJJsSUV+jra2IiQsx52km6lgAA8Jkgo91BJ0+eNJe/+uor+eMf/2gua+A4l1FCKSkp5mePHDligotOjqchxjmce+bMmRIYGGjmqdFWmiFDhsjzzz8v/iK6JMhk0CIDAIDvBJmBAwealpIBAwbImjVr5N133zW361BsneSusnSemIrovDRaf6MnfxRTp6RFhiADAIDv1MjoiKXg4GAzykiXGWjSpIm5XZcXGDp0qNXb6LdcQeYkQQYAAJ9pkdGalkWLFp1xu3YF4VfREcXFvhk5+VJY5JCgwAC7NwkAgBqlSkHGOROvrrP0448/muudOnWS3/3udxIUFGTl9vk1Z42MLn6dmZMvMXWKgw0AALAxyOzevduMTjp06JCZ0E7pHDA68dwnn3wirVu3tmjz/FtocKDUDQ2S7LxC071EkAEAwAdqZO644w4TVnQVbB1yrSed36Vly5bmPpzZKpNOwS8AAL7RIqNLCOgK1Drc2knXW5oxY4YZyYRfRdcJlcPpOYxcAgDAV1pkdBkAncDudFlZWWaOGfyKSfEAAPCxIKMz+U6cOFFWr14tDofDnLSFRlev1oJfnDkEm64lAAB8JMg888wzpkZGlwvQSev01L9/f7P441NPPWX9Vvox5pIBAMDHamRiYmLko48+MqOXnMOvO3ToYIIMyp5LhiADAICNQeZsq1ovXbrUdfnJJ5+s3lbVIIxaAgDAB4LMxo0bK/W4gABmry27RoZiXwAAbAsypVtcUJVRS7TIAADgE8W+qLxoVsAGAMBjCDIeFkOxLwAAHkOQ8VKLTMapfDPfDgAAsA5Bxks1MnmFRXIqv9DuzQEAoEYhyHhYndAgCQkqHslF9xIAANYiyHiYDkdnUjwAADyDIOPNZQqYSwYAAEsRZLxYJ5NOiwwAAJYiyHgByxQAAOAZBBkvYFI8AAA8gyDjBUyKBwCAZxBkvICFIwEA8AyCjDdHLdEiAwCApQgyXiz2JcgAAGAtgowXMGoJAADPIMh4QUyd4mJfggwAANYiyHhxQry0kxT7AgBgJYKMF4t9s/MKJa+gyO7NAQCgxiDIeEG98BAJKF4Am+4lAAAsRJDxgqDAAKkXFmwuM5cMAADWIch4CQW/AABYjyDjJUyKBwCA9QgyXsKkeAAAWI8g4+WuJVbABgDAOgQZL88lk85cMgAAWIYg4yUsUwAAgPUIMt4u9iXIAABgGYKMl1DsCwCA9QgyXkKxLwAA1iPIeLlriWJfAACsQ5Dx9grYtMgAAGAZgoyXa2QyTuVLUZHD7s0BAKBGIMh4SVRJkNEMk5lbYPfmAABQIxBkvCQ8JEgiQoLM5XRGLgEAYAmCjC1zyVDwCwCAFQgyXsRcMgAAWIsg40XM7gsAgLUIMl7EeksAANSgIDN9+nTp1auX1KtXTxo0aCBXX3217Nixw+0xOTk5MmnSJImLi5PIyEgZNWqUJCcniz+KiSie3ZdJ8QAAqAFBZvny5SakrFq1Sr788kvJz8+XK664QrKzs12PmTp1qixcuFDmz59vHn/48GEZOXKk+HXXEjUyAABYIlhstHjxYrfrc+fONS0z69evl4svvljS09PllVdekbffflsGDRpkHjNnzhzp0KGDCT99+/Y94zlzc3PNySkjI0N8RTQ1MgAA1NwaGQ0uKjY21pxroNFWmsGDB7se0759e2nWrJmsXLmy3O6q6Oho1ykxMVF8rWuJFhkAAGpYkCkqKpIpU6bIgAEDpHPnzua2pKQkCQ0NlZiYGLfHJiQkmPvKct9995lA5DwdPHhQfK/YlxoZAAD8vmupNK2V2bp1q6xYsaJazxMWFmZOPr0CNl1LAADUnBaZ22+/XRYtWiRLly6Vpk2bum5v2LCh5OXlSVpamtvjddSS3udvmBAPAIAaFGQcDocJMR9++KF8/fXX0rJlS7f7e/ToISEhIbJkyRLXbTo8+8CBA9KvXz/x5wnx9G8HAAB+3LWk3Uk6Iumjjz4yc8k46160SDciIsKcT5gwQaZNm2YKgKOiomTy5MkmxJQ1YsnXxdQpLvbNKyiSnPwiiQgtXkQSAAD4YZCZPXu2Ob/00kvdbtch1jfddJO5PHPmTAkMDDQT4emw6iFDhsjzzz8v/qhuaJAEBwZIQZHDLBwZERph9yYBAODXbA0yleleCQ8Pl1mzZpmTvwsICDDdS8ey8uR4dp40iibIAADg98W+tUlc3eIRVRpkAABA9RBkvCy2bnGdDEEGAIDqI8h4WWxkcZBJzSLIAABQXQQZL4svaZFJzf51PSgAAFA1BBkvi6VGBgAAyxBkvCyupGtJRy4BAIDqIch4WRzFvgAAWIYg42VxkXQtAQBgFYKMTcOvj2VR7AsAQHURZLwsvqRGJjOnwKy5BAAAqo4g42VR4SESFBhgLtO9BABA9RBkvCwwMEDql6yCzVwyAABUD0HGxu4lZvcFAKB6CDI2YL0lAACsQZCxcQh2KkEGAIBqIcjYOCleKkOwAQCoFoKMDZjdFwAAaxBkbBDLeksAAFiCIGNriwxdSwAAVAdBxgYU+wIAYA2CjJ3Dr+laAgCgWggyNoivW9wik5lbILkFhXZvDgAAfosgY4OoiGAJZr0lAACqjSBjg4CAAFf3EssUAABQdQQZm7iCDC0yAABUGUHGJvElI5cYgg0AQNURZGxC1xIAANVHkLEJXUsAAFQfQcYm8SXLFDCXDAAAVUeQsUlsyVwyqdTIAABQZQQZm8SVtMjQtQQAQNURZGxeOJJiXwAAqo4gY/PCkczsCwBA1RFkbB61lJVbIDn5rLcEAEBVEGRsEhUeLCFBrLcEAEB1EGR8YL0lggwAAFVDkPGBIdjHshiCDQBAVRBkfGFSPFpkAACoEoKMjVhvCQCA6iHI2CjONbsvQQYAgKogyPjC7L7UyAAAUCUEGR+Y3ZcaGQAAqoYg4ws1MgQZAACqhCDjA8sUsAI2AABVQ5Dxha4lRi0BAFAlBBkbxZYU+2bnFbLeEgAAVUCQsVG9sGAJDSr+L6BOBgCAc0eQ8ZH1lkoPwd6dkikLNx8Wh8Nh49YBAOD7gu3egNpO55JJyshxtchsO5wuo19cJZm5BaaGpn+beLs3EQAAn0WLjM1cK2Bn5cmB1JMy7tW1JsSopTtSbN46AAB8m61B5ptvvpHhw4dL48aNTTfLggUL3O7XrpUHHnhAGjVqJBERETJ48GDZtWuX1CTxJUOwdyRnyo2vrjYrYWvtjPp21zGbtw4AAN9ma5DJzs6Wrl27yqxZs8q8/7HHHpNnnnlGXnjhBVm9erXUrVtXhgwZIjk5OVLTWmRe+maP7E89KYmxETL/tn7mtp+SMuVoJnPMAADgkzUyw4YNM6eyaGvMU089Jf/85z9lxIgR5rbXX39dEhISTMvN6NGjpSatt6TiI0PljZv7SIv4utKpcZRsO5wh3+0+Jld3b2LrNgIA4Kt8tkZm7969kpSUZLqTnKKjo6VPnz6ycuXKcn8uNzdXMjIy3E6+rFF0uDmPDAuWueN7mxCjBrYtLvKlewkAAD8MMhpilLbAlKbXnfeVZfr06SbwOE+JiYniy4Z2aiR3XtZW5k3sK52bRLtuv6jNeeZ8xe6jDMMGAMDfgkxV3XfffZKenu46HTx4UHxZRGiQTL28nVuIUT1b1Jew4EBJzsiV3SlZtm0fAAC+zGeDTMOGDc15cnKy2+163XlfWcLCwiQqKsrt5I/CQ4Kkd8tYc5nuJQAA/CzItGzZ0gSWJUuWuG7TehcdvdSvX/GonppuYMlkeFrwCwAAfGzUUlZWluzevdutwHfTpk0SGxsrzZo1kylTpsjDDz8sbdu2NcHm/vvvN3POXH311VIbDCgJMqv2pEp+YZGElKzLBAAAfCDIrFu3Tn7zm9+4rk+bNs2cjxs3TubOnSt33323mWtm4sSJkpaWJgMHDpTFixdLeHjxSJ+armOjKLNMgS5fsPFAmqurCQAAFAtw1PAhMdodpaOXtPDXH+tlJr+z0SwgecegNjLtivPt3hwAAHzq+E1fhY+7qKR76VvqZAAAOANBxscNKJkYb/PBNEk/lW/35gAA4FMIMj6uSUyEtIqvK0UOkZU/p9q9OQAA+BSCjB9wLlegs/wCAIBfEWT8aD6ZVXuO270pAAD4FIKMH+jWLMac/3w0S07mFdi9OQAA+AyCjB9oUC9cEqLCRAfKbz9c+dW8i4ocFAgDAGo0WyfEQ+V1bhwtyRkpsvVQuvRsUf7EeAePnzRLGuhw7e93H5MTJ/Plueu7y2+7NPbq9gIA4A0EGT/RqUm0LPkpRbaW0yKTk18o1720ygzTPt2Ly/cQZAAANRJdS37igibR5lxbZMqyck+qCTFBgQHSs3l9ufOytjJnfC8JDQqULYfSy/05AAD8GUHGT3RuUjw9866ULNP6cro1e4tHNF3TvYn877b+MvXydvKb8xvIFZ0SzO3vrDng5S0GAMDzCDJ+omFUuFlAsrDIIT8lZZ5x/+o9xZPl9TltYcnrezcz5x9tOsyIJwBAjUOQ8RMBAQHSuZzuJQ0oP/xSfFvfVnFu9+n15nF1JCu3QBZtPlKp3/XgR1vlxldWl9nyAwCALyHI+GH30ulBZsP+NCkockjj6HBpWj/C7b7AwAAZ3au4VeadtWfvXjqamSuvrdwv3+46Jp9vS7J0+wEAsBpBxs+GYKuth92DzOq9Jd1KreJMy83pft+jqQQHBsjGA2nyU1LF89B8u+vXZRA+2HDIoi0HAMAzCDJ+xNm1tCMpU/IKily3ry5ZuuD0+hin8+qFyeUdi4t+5605WOHv0JaYXy8flZTMHEu2HQAATyDI+BHtNoqOCJH8QofsTC4u+NU6lk0lc8doi0x5RpcU/X6w4Zdya190JmBni0xUeLBZcfvjTYc98JcAAGANgozfFfy618lod1FeYZFpdWkRV6fcn72oTbw0iYmQjJwC+XRL2UW/249kyLGsPKkbGiRTBrczt324ke4lAIDvIsj4GdfIpZI6GVd9TMvYMutjShf9jumdWOGcMt+UtMb0ax1n5qMJCQqQbYczXK0/AAD4GoKMnxb8bjmU4V4fU0G3ktMfeiaamX/X7jtR5uKT3+4sro+5qO15Ur9uqFx6fgNznaJfAICvIsj4aYvMj0cyzPwxGw6cMNf7llPoW1pCVLhceUEjc/mVFXvd7svOLZB1+4tD0cXtzjPnI7s3MecfbTpk6mcqkpGTL7e+vk7eXs0MwgAA7yHI+JnmsXWkXliwGbX0/oZDkltQZGb8bdMgslI/P2FgS3P+8eZDkpLx64ikVXtSTRFxYmyEq9ZmUIcGpuj3SHqOub8iCzYeki+3J8vDn2yXzJz8av2NAABUFkHGz2itS8fGxQW/c0paVXqfpT6mtG6JMdKrRX0TWl5fuf+MYdfareR8rrDgILmqZNXsD85S9PvFtmRzfjKvUD7ezEgnAIB3EGT8uHtpz7HsCuePKc+Ega3M+Zur98upvOKh2N/sLC70vbhtcbeS08gLi7uXPttyxPXY06WfyndrsTnbXDUAAFiFIOOHLigJMk69W5690Lc0nRyvWWwdSTuZL+9v+EUOHj9pQpEWAvdv4/5cPZvXN91N2XmF8sX2spcsWLYjxSyRoPPchAYFypZD6bKlZO0nAAA8iSDjh5xzySidIK99w3rn9PMaWG4e0MJcfnXFXlle0hrTPTFGosJD3B6r3UzXdCtulXlv3cEKu5VGdGssQzs3LHddJy0YfnH5z6Z1BwAAKxBk/FDL+EipExpkLvdqEWvqZs6VDsWuFx5sWmKeWbLLbbTS6a7tlSj6K77bnXrGsG2dJVhbZNQVHRvKmJIZhHVGYB0JVdrs5T/L9M9+kklvb5C1+4pHSAEAUB0EGT+kLSqdSgp++7Y6t/oYp7phwXJ9n+LQkZKZa84vahtf5mOb1q8jw0qGbf93xR63+1b+nGq6nRpGhZsuL92elvF1JSu3QBb98GvRrw4Tf/LLneayjuSeMm+TpJ9kdBMAoHoIMn7q3mEdZFy/5q4WkKq4qX8Lsyq2s4uqS9OYch9760XFBcILNx+W5FLDtr/Ynuyqu9GWIe2KGt2reAbht0uKfrUY+I53NkphkUOGdmoozePqyKG0U/L3D7eIw1Hx/DQAAFSEIOOnejSvLw+N6GxaVqqqUXSEXNWluKVlYJt409JTmWHbr32/z1XzonPHqCs6Fa+urUb1aGqWN9h8MM10Rf3jwy3yy4lTpmj4sT90kWdGdzcB6pMtR8qtu/F3BYVFsnRHiny+LcltpXIAgLUIMrXcP67qIDcPaCl3Dz3/rI+9paRV5q3VB8yswhsPpsmxrFxTa9On1Mip+MgwuaJTcdHv7e9skEU/HDHBRQOMFhN3TYyRu64o/n3/+ni7/Hw0S7xNW4L2p2bLJz8ckQ83lr8i+Lk6kn5KZn65Uwb+31IZP2et/OmN9XLp40tNUbXuM3+QW2DNvkDFNOBqkPfE63/TwTS54b+r5Y2V+yS/kCBd2+QWFNaqL1ABjhretp+RkSHR0dGSnp4uUVG/jvbBudOuoUFPLJP9qSflod91ksPpp+TF5Xvkd10byzNjurs9dsWuY3LDK6td1+8Z2l5uu7S167q25tz46mpTQKz1Pi/c0MMM367sxH5OWlD89U8pJpCsP3BCBrSOk39c1dGsBn66lMwc05q0fv8JsxhmZs6vweL8hHrmbzj/HEeAKX0Lrd57XP777R6zLc7VHGLrhppWrqMlNUj164TI+AEt5Q89m5rWsLLorMjr9p0w+6RBVLh42//W/yJ//2CL9GkVKzNGdTErptc2Ol9SREkxvSd/x5/fXG9GDOoXgXdu7euaH6q6NLgMfeob+flo8TxTrc6rK/cN6yCDOzQ45/cX/M/+1Gy5/uXVciq/UJ66rlu5gzhq0vGbIINz8vrKffLAR9vMPDR6kN57LFueu767/LZkBuDSQeXS/yyTA8dPmiLi18b3PmN0ldba6AfuiZKi35g6IaZguFPjaGkcE26Ck4YCfa4iR8nlkpervmy3Hsow3Te6TENpuqzC36/sINf2TDS/U8PBy9/uNUFDZx520jlv2jeqJ4fTTsmxrDwJDQ6Uf1zZQf7Yr3mlPvB1G5btPCqzvt4t6/YXr3nlnKBwbN/mMqRTgujm6lw9Gvh0XzjpkPlLzj9PLm3XQBpEhcmyHUfl65+SZc3e46b7Tlu13pjQWzo0Ovtr9kR2nnz/c6pZf0sPwJFhwcWn8GBzcKxsGNHff+vr681+V/oc9/+2eD/WhgOg/n/+c8FWmbf2oEy8uJX89YrzK+xurSp9PU54bZ35v3bSkDtvYr8qBeny3qNa96YtoanZea7X5T+v6igXNLUmMNnZbavzVoWHeDZs+qPDaafkDy+sNDWISl++fx1yvtx2SWu/fA8TZEoQZKyl3SP9pn9tCnidYWD9/YOl3mnzz6jvdx+ThT8cNt1IemAui36YP/LJdtl+JMMcwKtCi4evuqCRqeN55utdJuCo3i1i5TftG5gA4/ww126tsb2bmQN824RICQkKNN1jf5u/WZbuKJ5PZ1D7BjJj5AXltohoN5TWBr34zc+u36X74fc9m5puurLWvdIP30+3JpkWIR3BVdG7LiIkyHyb0kA2Z3wv6dHcfWSavmW1VWnJTymm5Wvr4fRyn0+3629DzjdrbFU0TH/jgRMy5uVVkpNfZPaldpFtOJBm7vvN+eeZ1hlddLQ8qVm55v8yTw8yhQ4ThvRgo7VSWsdlTqFB5uCjzd76e3Q/6nlcZKh0bRpzRiuIHvC/+jFZPtuSJDkFRWY7hnRqKI091Er07JJd8kTJyDrn6+Cp0d3OmFupOtJO5sm4V9fI5l/SzZppz1zfXZ76apepJ9P3yLt/6iutz4t0taxoS6MGYW1hvP03baRVyX3l0fflb/6zTI5n58m/R3SSEd2byAvLfjaLxGrg15eAPs/ky9qa17436WsqK6fA/H1VmTJC6Wtn7MurzefF9JEXyIiSOa4gpsX5uhdXmS+XOnL0wmb1zWtH6SCL/1zb1Xw5qci+Y9nm50+czDNfMPX1qj+jX8zO9rOeQJApQZCx3uOf/ySzlv5sLl96/nkyd3zvaj+nfkDtTMoyswLrgVmHZusXCP1GHBigo6H020WA6MefXtZL2pKhBzbthnF+29DAMPf7ffLEFztNGHBqFV/X1AHp48v6ZqJvAw0Zj372k+lb1s/Zni1i5fIOCTK4ZCZkHWq+YNMhWbw1yQwvd4aOsX2aya0Xt6rwQH96C8o3u47K8h1HTdeCHny0K2dQ+wRz8IytEyo3v7bWhBV9/hdu7CGXtDvPbKN2XT23dLdsLAkZTu0SIk3g0cdol1lmboFZFPSnpEzXt/Enru1qhtKfTms0fj/7e/PBpb/nv+N6mn2tAVD3o4YTPehqK8X4gS3dPtB0X+l+07mI9HdWVXDJGmJaxK7/V9/sOmb2TVn9/F2aRpv/R23x0oPi6f+f2m2jAVpDgIafcf1bnPXbqC56OuXdTeby73s0NaPz9MDf+ry68t9xvcyBobI0ROhrUJ9Dw2jrBpEm3LaMqytPL9ll/k+0Beb1m/uY1hF9rWuI1IOzTmMw9+Ze8v3uVBM+nN+slb4Xfn9hU7ljcNtyW9mmf/qjvPjNHrPdn0+5WIJLwop+U9c5nHSbVNem0TLzum6uYKStnqv2psr76w+Zmb4TY+tIy/g60iK+rrSIq2sCaHE4LTLnGu70vsp++Xnyi53y6nd7TauqtrzqjOH6/tIBBBc0iTGtoZXx2OKf5PllxZ89Sl+Tdw853/V3Wsm0uO44amr8dKJPX+6aO56dJ6NfWik7k7PMa2P+n/tJo+hweWfNQXnw463mS+LZXsv6Pn7w421l3qcLCT89urv5IuhNBJkSBBnraZfQwP/72rw5Hr3mAtd8NL7klxMn5f8t3C47kjPlTxe3lmt7Nq3Uh91PSRly7/tbTLFkaToBYeluKf2w0NFZOoRda2GqSg8ghQ5tuQg848P/tjc3mIO5tmrcdmkb+Wp7sjnYqbDgQBnWuaHp/x7QJr7MEKVvbe0m+fei7WbbNYA8MLyj6eoLDgw0z6uhRw+iOqpMD25v39rXbSTcruRMuWv+ZvmhZMkJ/Vu1mfrGfs1l5Z5U89x7StVi6IdnkD63BtDAANOqoHVM2bmFkp1XYFphdDHS8JBAE9L0sna5JZUa0l+afvjqwqUapHSJDO3CK/2JpaHn8k4JZjJG/T+at+aAWeC0dP2TdhU+OLxTud1Eq/ekyo2vrDGBTQ+M2i35wy9pMvH19Wa7NIw8OvICM8WAbm95NCy8/O0es9ZY6RB9ugb1wuTNW/pIu4R6bi1ao19aJbtS3At/4yNDZWyf5rL1ULppgXO2sul77s7L2kr9Uq89DSCXPbHc/B2v3tTTBOPTaZDRUYQZOQVm/2trnQZp/eaur4FzoV9i7rr8/Aq7qvT16xy16Hzdnt4VrLfpAVJDTXG4iS3z2//6/cdNt4mGIQ2xn5fMKK6v52fHdJeYOlV/H55Ou2kf/fRH12K6Srt57xjUxoToqrYoecKJ7DxTb6itwwlRYTL/T/2lWdyvX1i0Bfi2N9dLckaueT29MaHPGV3WOufX5Hc2mveW1gtqC6AGzvp1QmXJj8lyOD3HfNmYdkU783nqiS7XshBkShBkPENH4Xy3+5hpei+rW8nf6UFB38Bf/ZhiFsTUbhJ9Y2u3y9Xdm0iPZvU9/mGmrRFT39tkWhac9GB9Y9/mMuGiltKgXnili/+mvbfZtPCUR79xvX9bf4krowtQv4HrB512gWizs9IDjbNVSj8c7x7S3rRkVHWfaMuDbt/6fcdNkeqFzWJMgNGWptLfgrVwWrubdFi7tlroQbss2oKmkzPOX/+L+XDWA59+ozy9rkJbo0Y+/705mGswnHX9ha6/QVu0tCDX2cWmf7N2VWoz/UXt4iUlI1d2JGWa8KvTDOhBW18nSlsJNRRp6NidkmUCip7Xrxtiwn/zuDO/Fevvu/bFlbIv9aT51qxzN+mirc5t1v3zxBc7TD2U0lYdLeJ17nedMVtfKzqVgtZXldd6oF08d7232fU8ThoWf9u1sfRuWV8OnThlZv3WrgYNmvp36YFMD2BBAQGSnJnrqqW6omOCOcDpAVBb5ZLSc+RIeo5p5fpw4yFX8H/4ms5m27TQft2+42Z277X7TpjWhNL0fab/V9o66KRh+MpnvjUDDXSfPHltN/Oa/Nv8H0xo1P/vh0Z0kv6t4yoMmxXRv0fD6Kylu81oMv3z9P9PA6z+3zpf7/qa1AEOSr/MFbdS6XQW4aabW/9vdeCCfjnR97D+fdp9rd3bGliLr+fJ8excE7hNC3NJa7Pu38s6JLievyL6pWDOd/tk9rLdJpjG1Q2Vd//Ur8yube12uunVteaLkNZOvX5zb1fripYA3DRnrXkvaejXgRylXzvaYvj3BVtcn0P6vtLWvPIGLFiJIFOCIIPqysjJNx/o7RtGVboJ3Cr64frwJ9tNd5YuKzG+fwu3b+Hn8jwvLC+uldDak9L1SNqqod2Dpb/FlUW77T7YcMh0j2jw0AObtkhpV4eVdSSVpQcW7Z7TlhrtctMuJT3oaGvFgNbx5uCuH74aBvWAouFIm9a1YFwPpDrS7NMtR8w3Va2vmjex7xlBR7s8Z365Sz7Y8ItrBuyK6If8Xy5tY1oJqtINoa+1nUmZ0r1Z/XK/9eqB5/8t2u7qNtRuGh0Jd8/7W8xB8dM7Ljprkbi2BGpXj3ZTaoG9hiFtaahsAa2+H7Q7Ubta9YCvv7dOSJCZ5bs0/RNu6t9S7rqiXZlzXunhRwNTcbA5Yf42/favz6fF1tr6p/+P2qqj0z40jg6XxVMvdr3eNEBOfGOdq8VH67C0lVLDQMdGUbIjOUO2/JJhuqt/PJxh/u/1QB5dJ1SiI4LN36shQ0OpBg1nOFM6x9a9Q9ubbjatFXn1u30y5zt9/5y9C1X/73R/VLW7dUzvRNOKWNb/h74PtQVNX5fOlszzE+rJ02O6mc+o8mggGTdnjWlt1lCu9XfaKqctgfpeuvKChvLsmAvLfN3p/5N+KfjXx9tM6662Kr4+oXeFv88KBJkSBBngTPq212/ZWpirTfvn0pKiB/evf0wxI2zOVnzqLdqFpQehsj74tQj5ltfWmm+teqA7/WCr3+Y/+Ev/cgvSnQf+Tb+kyedbk0xrkLaa6EGgXcN60qFhPbMvtEvEqiHUlfl75363T2Z+tdOty1Nn1dbCbG/ZnZIpM7/a5dZqqC0qWuujrRPaJaohsbK0leGhhdtMbYeztWd418am20O9fUsf6d8m/oyulSe+3GG6mpxTHVSVBihtbb13WHvTzXU6bbl7e/UB07qmXbPBegoMdLV0aYvRvtRsU8TupIFfu2T1pK8xPdcCd21BcbZm6/tRD8QHj5+SOd/vNa2IWgs2+4YernooDSL/2/CLvLlqv6tlVO+bdnk700pcme4eDSwT5q41IV5fv3XDNMjlmQCuX2bOFmT19/75jfWmy167XF+5qZd53XsKQaYEQQaA1vpo87mzeFa/wfZuGSu9WsaaguBz6R7Vj0wtjI6JCLG9VkK7QrQWbPG2JPMt++u/XlLpLkert0NrXzTAWDEHzztrDsiDH21z6zocP6CFaaWoKGzqYAFnl7B2D2vA1HBZfIoyB++0k/kmkKSdypecvEITKnSf6eABDRfVLRzW14e23mloiK8bJlERwefUOvfNzqNyx7yNZjs19Gir0Jp9x10F6M6wqKPPbujb/JyHoWvL5Z/eXG9+j9LWOx0tV9lWVQ1UE15ba+rV9EvQ7BsuLLMeywoEmRIEGQBKuwf0QKddKVYWhvoCHb6tE+v5SguZFbQLRItUtd5G6z4WTR5Ya+aO0RB221vrXdM7lJ5/SsOLtsBUZzh0bkGhmVVduwifHt3tnCff1DCkNVnapastQY+N6mIGP1iNIFOCIAMA/knrV7RoeNgFjWrdLNPObraFm4+Ytex09JrWefnKEPD8wiK55/0fTN2c+tfwjnLTgJaW/g6CTAmCDADAX+kh2lfCS1ndeTpM/bWV+2TOTb1lYFv3+iVvHb+9P1UfAACoFF8NMcqMKLuqg4zu3azMYd/ewurXAACgykHLzhCjCDIAAMBvEWQAAIDfIsgAAAC/RZABAAB+iyADAAD8FkEGAAD4Lb8IMrNmzZIWLVpIeHi49OnTR9asWWP3JgEAAB/g80Hm3XfflWnTpsmDDz4oGzZskK5du8qQIUMkJSXF7k0DAAA28/klCrQFplevXvLcc8+Z60VFRZKYmCiTJ0+We++994zH5+bmmlPpKY718SxRAACA/6jsEgU+3SKTl5cn69evl8GDB7tuCwwMNNdXrlxZ5s9Mnz7d/OHOk4YYAABQM/l0kDl27JgUFhZKQkKC2+16PSkpqcyfue+++0x6c54OHjzopa0FAADeVuMWjQwLCzMnAABQ8/l0i0x8fLwEBQVJcnKy2+16vWHDhrZtFwAA8A0+3SITGhoqPXr0kCVLlsjVV1/tKvbV67fffnulnsNZy6xFQwAAwD84j9tnG5Pk00FG6dDrcePGSc+ePaV3797y1FNPSXZ2towfP75SP5+ZmWnOKfoFAMD/6HFcB+/4bZC57rrr5OjRo/LAAw+YAt9u3brJ4sWLzygALk/jxo1NwW+9evUkICDAsu1yDuvW52ZYt+exv72Hfe097GvvYV/7377WlhgNMXoc9+t5ZPx9fDuswf72Hva197CvvYd9XXP3tU8X+wIAAFSEIAMAAPwWQaaKdK4aXf+JOWu8g/3tPexr72Ffew/7uubua2pkAACA36JFBgAA+C2CDAAA8FsEGQAA4LcIMgAAwG8RZKpo1qxZ0qJFCwkPD5c+ffrImjVr7N4kvzd9+nTp1auXmYW5QYMGZn2tHTt2uD0mJydHJk2aJHFxcRIZGSmjRo06Y1FRnLsZM2aYma+nTJniuo19bZ1Dhw7JDTfcYPZlRESEXHDBBbJu3TrX/TrmQmcvb9Sokbl/8ODBsmvXLlu32R8VFhbK/fffLy1btjT7sXXr1vLvf//bba0e9nXVfPPNNzJ8+HAzy65+VixYsMDt/srs1+PHj8vYsWPNJHkxMTEyYcIEycrKquIWuf9ynKN58+Y5QkNDHa+++qpj27ZtjltvvdURExPjSE5OtnvT/NqQIUMcc+bMcWzdutWxadMmx5VXXulo1qyZIysry/WYP//5z47ExETHkiVLHOvWrXP07dvX0b9/f1u329+tWbPG0aJFC0eXLl0cd955p+t29rU1jh8/7mjevLnjpptucqxevdqxZ88ex+eff+7YvXu36zEzZsxwREdHOxYsWODYvHmz43e/+52jZcuWjlOnTtm67f7mkUceccTFxTkWLVrk2Lt3r2P+/PmOyMhIx9NPP+16DPu6aj799FPHP/7xD8cHH3ygqdDx4Ycfut1fmf06dOhQR9euXR2rVq1yfPvtt442bdo4xowZ46gugkwV9O7d2zFp0iTX9cLCQkfjxo0d06dPt3W7apqUlBTzhlm+fLm5npaW5ggJCTEfTk4//vijeczKlStt3FL/lZmZ6Wjbtq3jyy+/dFxyySWuIMO+ts4999zjGDhwYLn3FxUVORo2bOh4/PHHXbfp/g8LC3O88847XtrKmuGqq65y3HzzzW63jRw50jF27FhzmX1tjdODTGX26/bt283PrV271vWYzz77zBEQEOA4dOhQtbaHrqVzlJeXJ+vXrzfNZk6BgYHm+sqVK23dtppG1+lQsbGx5lz3e35+vtu+b9++vTRr1ox9X0XadXTVVVe57VPFvrbOxx9/LD179pQ//OEPpsu0e/fu8vLLL7vu37t3r1kQt/S+1nVqtMuafX1u+vfvL0uWLJGdO3ea65s3b5YVK1bIsGHDzHX2tWdUZr/quXYn6XvBSR+vx8/Vq1dX6/f7/OrXvubYsWOmH/b01bf1+k8//WTbdtU0RUVFpl5jwIAB0rlzZ3ObvlFCQ0PNm+H0fa/34dzMmzdPNmzYIGvXrj3jPva1dfbs2SOzZ8+WadOmyd///nezv++44w6zf8eNG+fan2V9prCvz829995rFizU0B0UFGQ+qx955BFTl6HY155Rmf2q5xrkSwsODjZfVKu77wky8NmWgq1bt5pvU7DewYMH5c4775Qvv/zSFKzDs6Fcv4U++uij5rq2yOhr+4UXXjBBBtZ577335K233pK3335bOnXqJJs2bTJfiLRAlX1dc9G1dI7i4+NN0j999IZeb9iwoW3bVZPcfvvtsmjRIlm6dKk0bdrUdbvuX+3aS0tLc3s8+/7caddRSkqKXHjhheZbkZ6WL18uzzzzjLms36TY19bQURwdO3Z0u61Dhw5y4MABc9m5P/lMqb6//e1vplVm9OjRZmTYjTfeKFOnTjUjIhX72jMqs1/1XD9zSisoKDAjmaq77wky50ibg3v06GH6YUt/49Lr/fr1s3Xb/J3WkGmI+fDDD+Xrr782QyhL0/0eEhLitu91eLYeENj35+ayyy6TLVu2mG+szpO2GmgTvPMy+9oa2j16+jQCWsPRvHlzc1lf5/pBXnpfa/eI1g2wr8/NyZMnTc1FafrFUz+jFfvaMyqzX/Vcvxjplygn/ZzX/xutpamWapUK1+Lh11qNPXfuXFOJPXHiRDP8Oikpye5N82u33XabGb63bNkyx5EjR1ynkydPug0J1iHZX3/9tRkS3K9fP3NC9ZUetaTY19YNbw8ODjZDg3ft2uV46623HHXq1HG8+eabbkNX9TPko48+cvzwww+OESNGMCS4CsaNG+do0qSJa/i1DhWOj4933H333a7HsK+rPsJx48aN5qTR4cknnzSX9+/fX+n9qsOvu3fvbqYhWLFihRkxyfBrGz377LPmQ17nk9Hh2DouHtWjb46yTjq3jJO+Kf7yl7846tevbw4G11xzjQk7sD7IsK+ts3DhQkfnzp3NF6D27ds7XnrpJbf7dfjq/fff70hISDCPueyyyxw7duywbXv9VUZGhnkN62dzeHi4o1WrVmbuk9zcXNdj2NdVs3Tp0jI/nzU8Vna/pqammuCic/tERUU5xo8fbwJSdQXoP9Vr0wEAALAHNTIAAMBvEWQAAIDfIsgAAAC/RZABAAB+iyADAAD8FkEGAAD4LYIMAADwWwQZAADgtwgyACzTokULeeqppyr9+GXLlklAQMAZi1PWVOe6fwCcXXAlHgOghrr00kulW7dulh1c165dK3Xr1q304/v37y9HjhyR6OhoS34/gNqHIAOgQrqKSWFhoQQHn/3j4rzzzjvn1eR11VwAqCq6loBa6qabbpLly5fL008/bbp39LRv3z5Xd89nn30mPXr0kLCwMFmxYoX8/PPPMmLECElISJDIyEjp1auXfPXVVxV2nejz/Pe//5VrrrlG6tSpI23btpWPP/643K6luXPnSkxMjHz++efSoUMH83uGDh1qWm2cCgoK5I477jCPi4uLk3vuuUfGjRsnV199dYV/r/4NF110kUREREhiYqJ5juzsbLdt//e//y1jxowxrUpNmjSRWbNmuT3HgQMHzD7Q7YqKipJrr71WkpOT3R6zcOFCs2/Cw8MlPj7e/O2lnTx5Um6++WapV6+eNGvWTF566aVK/o8BKAtBBqilNMD069dPbr31VhMU9KQHeKd7771XZsyYIT/++KN06dJFsrKy5Morr5QlS5bIxo0bTcAYPny4ObhX5KGHHjIH/B9++MH8/NixY+X48ePlPl4P9P/5z3/kjTfekG+++cY8/1//+lfX/f/3f/8nb731lsyZM0e+++47ycjIkAULFlS4DRrCdHtHjRpltuPdd981web22293e9zjjz8uXbt2NX+f/v133nmnfPnll+a+oqIiE2J02zUA6u179uyR6667zvXzn3zyiQku+nfqc+i+6t27t9vveOKJJ6Rnz57m/r/85S9y2223yY4dOyrcfgAVqPb62QD81iWXXOK488473W5bunSpQz8aFixYcNaf79Spk+PZZ591XW/evLlj5syZruv6PP/85z9d17Oyssxtn332mdvvOnHihLk+Z84cc3337t2un5k1a5YjISHBdV0vP/74467rBQUFjmbNmjlGjBhR7nZOmDDBMXHiRLfbvv32W0dgYKDj1KlTrm0fOnSo22Ouu+46x7Bhw8zlL774whEUFOQ4cOCA6/5t27aZ7V2zZo253q9fP8fYsWPL3Q79HTfccIPrelFRkaNBgwaO2bNnl/szACpGiwyAMmmrQWnaIqMtI9rlo9062r2irTVna5HR1hwn7bLRLpmUlJRyH69dUK1bt3Zdb9Sokevx6enppiundCtHUFCQ6QKryObNm023lW6z8zRkyBDTyrJ3717X47SFqjS9rn+j0nNtsSrdatWxY0ezL5yP2bRpk1x22WWV3h/araY1QhXtDwAVo9gXQJlOH32kIUa7U7Tbp02bNqbW5Pe//73k5eVV+DwhISFu1/XgrQHiXB5f3LhTdRrC/vSnP5m6mNNpnYpVdJ+czbnuDwAVo0UGqMV01JCOSKoMrUfRAmGtAbngggtMS4IWB3uTDtPWYmMd5u2k279hw4YKf+7CCy+U7du3mwB2+kn3gdOqVavcfk6vawuU0vODBw+ak5M+pxYqa8uMs7VF62IAeA8tMkAtpiN1Vq9ebQKJdrfExsaW+1gdcfTBBx+YAl9tRbj//vttaUmYPHmyTJ8+3YSQ9u3by7PPPisnTpww21QeHdnUt29fU9x7yy23mNYmDSHawvTcc8+5hbXHHnvMjIDS++bPn28KeNXgwYNNgNNiZR2ZpaOntFj3kksucXXDPfjgg6ZrSbvGRo8ebR7z6aefmt8PwDNokQFqMe0u0hoTbVHQOWAqqnd58sknpX79+mYSOw0zWmOiLR3epqFAh0j/8Y9/NDUsznoXHe5cHm0p0ZFGO3fuNEOwu3fvLg888IA0btzY7XF33XWXrFu3ztz/8MMPm79Zn1tpUProo4/MPrj44otNsGnVqpUZAVV6gkENPzrEXCcaHDRokKxZs8aDewNAgFb82r0RAFBV2iqk3T46xFvngalO69SUKVPMCYD/oGsJgF/Zv3+/fPHFF6ZLJzc313QN6cij66+/3u5NA2ADupYA+JXAwEAzlFpnzx0wYIBs2bLFzDDsLMoFULvQtQQAAPwWLTIAAMBvEWQAAIDfIsgAAAC/RZABAAB+iyADAAD8FkEGAAD4LYIMAADwWwQZAAAg/ur/A70TFzU54iK+AAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 为对比学习负采样准备词频率分布\n",
    "vocab_size = len(dataset.token2id)\n",
    "embed_size = 128\n",
    "distribution = dataset.get_word_distribution()\n",
    "print(distribution)\n",
    "model = SkipGramNCE(vocab_size, embed_size, distribution)\n",
    "\n",
    "from torch.my_utils.data import DataLoader\n",
    "from torch.optim import SGD, Adam\n",
    "\n",
    "# 定义静态方法collate_batch批量处理数据，转化为PyTorch可以需要的张量类型\n",
    "class DataCollator:\n",
    "    @classmethod\n",
    "    def collate_batch(cls, batch):\n",
    "        batch = np.array(batch)\n",
    "        input_ids = torch.tensor(batch[:, 0], dtype=torch.long)\n",
    "        labels = torch.tensor(batch[:, 1], dtype=torch.long)\n",
    "        return {'input_ids': input_ids, 'labels': labels}\n",
    "\n",
    "# 定义训练参数以及训练循环\n",
    "epochs = 100\n",
    "batch_size = 128\n",
    "learning_rate = 1e-3\n",
    "epoch_loss = []\n",
    "\n",
    "data_collator = DataCollator()\n",
    "dataloader = DataLoader(data, batch_size=batch_size, shuffle=True,\\\n",
    "    collate_fn=data_collator.collate_batch)\n",
    "optimizer = Adam(model.parameters(), lr=learning_rate)\n",
    "model.zero_grad()\n",
    "model.train()\n",
    "\n",
    "# 需要提前安装tqdm\n",
    "from tqdm import trange\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 训练过程，每步读取数据，送入模型计算损失，并使用PyTorch进行优化\n",
    "with trange(epochs, desc='epoch', ncols=60) as pbar:\n",
    "    for epoch in pbar:\n",
    "        for step, batch in enumerate(dataloader):\n",
    "            loss = model(**batch)\n",
    "            pbar.set_description(f'epoch-{epoch}, loss={loss.item():.4f}')\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            model.zero_grad()\n",
    "        epoch_loss.append(loss.item())\n",
    "    \n",
    "epoch_loss = np.array(epoch_loss)\n",
    "plt.plot(range(len(epoch_loss)), epoch_loss)\n",
    "plt.xlabel('training epoch')\n",
    "plt.ylabel('loss')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9430e9a",
   "metadata": {},
   "source": [
    "TF-IDF加权\n",
    "\n",
    "定义词频率（term frequency）。注意到不同长度的文章词频率会有较大差距，不利于比较和运算，因此可以对词频率取对数。\n",
    "\n",
    "$$\\text{tf}_{t,d} = \\log (\\text{count}(t,d) + 1)$$\n",
    "\n",
    "其中$\\text{count}(t,d)$表示词$t$在文档$d$中出现的次数，为了避免对0取对数，把所有的计数加1。\n",
    "\n",
    "那么如何区分高频词与低频词呢？TF-IDF引入了另一个重要的评价指标——文档频率（document frequency），即一个词在语料库所包含的多少篇文档中出现。在所有文档里出现的词往往是虚词或是常见实词，而只在少量文档里出现的词往往是具有明确含义的实词并且具有很强的文档区分度。用$\\text{df}_t$来表示在多少篇文档中出现了词$t$。\n",
    "\n",
    "为了压低高频词和提升低频词的影响，TF-IDF使用文档频率的倒数，也就是逆向文档频率（inverse document frequency）来对词频率进行加权。这很好理解，一个词的文档频率越高，其倒数就越小，权重就越小。\n",
    "\n",
    "$$\\text{idf}_t = \\log \\frac{N}{\\text{df}_t}$$\n",
    "\n",
    "其中$N$表示文档总数。为了避免分母为0，通常会将分母改为$\\text{df}_t+1$。\n",
    "\n",
    "基于词频率和逆向文档频率，得到TF-IDF的最终值为：\n",
    "\n",
    "$$w_{t,d} = \\text{tf}_{t,d} \\times \\text{idf}_{t}$$\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f765e353",
   "metadata": {},
   "source": [
    "很多情况下会额外对文档的TF-IDF向量使用L2归一化，使得不同文档的TF-IDF向量具有相同的模长，便于相互比较。\n",
    "下面给出了TF-IDF的代码实现。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "9ce8e610",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TFIDF:\n",
    "    def __init__(self, vocab_size, norm='l2', smooth_idf=True,\\\n",
    "                 sublinear_tf=True):\n",
    "        self.vocab_size = vocab_size\n",
    "        self.norm = norm\n",
    "        self.smooth_idf = smooth_idf\n",
    "        self.sublinear_tf = sublinear_tf\n",
    "    \n",
    "    def fit(self, X):\n",
    "        doc_freq = np.zeros(self.vocab_size, dtype=np.float64)\n",
    "        for data in X:\n",
    "            for token_id in set(data):\n",
    "                doc_freq[token_id] += 1\n",
    "        doc_freq += int(self.smooth_idf)\n",
    "        n_samples = len(X) + int(self.smooth_idf)\n",
    "        self.idf = np.log(n_samples / doc_freq) + 1\n",
    "    \n",
    "    def transform(self, X):\n",
    "        assert hasattr(self, 'idf')\n",
    "        term_freq = np.zeros((len(X), self.vocab_size), dtype=np.float64)\n",
    "        for i, data in enumerate(X):\n",
    "            for token in data:\n",
    "                term_freq[i, token] += 1\n",
    "        if self.sublinear_tf:\n",
    "            term_freq = np.log(term_freq + 1)\n",
    "        Y = term_freq * self.idf\n",
    "        if self.norm:\n",
    "            row_norm = (Y**2).sum(axis=1)\n",
    "            row_norm[row_norm == 0] = 1\n",
    "            Y /= np.sqrt(row_norm)[:, None]\n",
    "        return Y\n",
    "    \n",
    "    def fit_transform(self, X):\n",
    "        self.fit(X)\n",
    "        return self.transform(X)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
